nldas_nest <- read_rds("data/nldas_nest.rds")
nldas_nest
## # A tibble: 100 x 3
## installation variable data
## <chr> <chr> <list<df[,2]>>
## 1 eglin_afb tmp_f [262,957 x 2]
## 2 fort_benning_ga tmp_f [262,957 x 2]
## 3 fort_bliss tmp_f [262,957 x 2]
## 4 fort_bragg tmp_f [262,957 x 2]
## 5 fort_campbell tmp_f [262,957 x 2]
## 6 fort_carson tmp_f [262,957 x 2]
## 7 fort_drum tmp_f [262,957 x 2]
## 8 fort_gordon tmp_f [262,957 x 2]
## 9 fort_hood tmp_f [262,957 x 2]
## 10 fort_jackson tmp_f [262,957 x 2]
## # ... with 90 more rows
zoo timeseries# Function to convert hourly `dttm` to a time series (zoo) object
make_ts = function(df) {
zoo::zoo(x = df[["value"]],
order.by = df[["local_dttm"]],
frequency = 24)
}
# Map zoo ts function over each location
TMP_ts <-
nldas_nest %>%
filter(variable == "TMP") %>%
mutate(data_ts =
purrr::map(data, make_ts))
xts timeseries# Create `xts` time series
tmp_f_ts <-
nldas_nest %>%
filter(variable == "tmp_f") %>%
mutate(data_ts =
purrr::map(data, timetk::tk_xts))
tmp_f_ts
## # A tibble: 25 x 4
## installation variable data data_ts
## <chr> <chr> <list<df[,2]>> <list>
## 1 eglin_afb tmp_f [262,957 x 2] <xts>
## 2 fort_benning_ga tmp_f [262,957 x 2] <xts>
## 3 fort_bliss tmp_f [262,957 x 2] <xts>
## 4 fort_bragg tmp_f [262,957 x 2] <xts>
## 5 fort_campbell tmp_f [262,957 x 2] <xts>
## 6 fort_carson tmp_f [262,957 x 2] <xts>
## 7 fort_drum tmp_f [262,957 x 2] <xts>
## 8 fort_gordon tmp_f [262,957 x 2] <xts>
## 9 fort_hood tmp_f [262,957 x 2] <xts>
## 10 fort_jackson tmp_f [262,957 x 2] <xts>
## # ... with 15 more rows
tmp_f_ts$data_ts[[1]] %>% head()
## value
## 1990-01-01 00:00:00 64.06
## 1990-01-01 01:00:00 62.47
## 1990-01-01 02:00:00 60.89
## 1990-01-01 03:00:00 59.31
## 1990-01-01 04:00:00 57.76
## 1990-01-01 05:00:00 56.21
(Select segment with cursor to zoom)
# Sample time series dynamic graph
tmp_f_ts$data_ts[[1]] %>%
dygraph(main = tmp_f_ts$installation[1]) %>%
dySeries("value", label = "Temperature (°F)") %>%
dyLegend(show = "always", hideOnMouseOut = FALSE)
# Ref: https://cran.rstudio.com/web/packages/sweep/vignettes/SW01_Forecasting_Time_Series_Groups.html
# Map the Exponential Smoothing ETS (Error, Trend, Seasonal) model function, ets, from the forecast package
tmp_f_fit <-
tmp_f_ts %>%
slice(1:4) %>%
mutate(data_ts =
purrr::map(data_ts, timetk::tk_ts)) %>%
mutate(fit_ets = map(data_ts, forecast::ets))
# View ETS model accuracies
tmp_f_fit %>%
mutate(glance = map(fit_ets, sweep::sw_glance)) %>%
unnest(glance)
## # A tibble: 4 x 17
## installation variable data data_ts fit_ets model.desc sigma logLik
## <chr> <chr> <list<df[,2]> <list> <list> <chr> <dbl> <dbl>
## 1 eglin_afb tmp_f [262,957 x 2] <ts> <ets> ETS(A,Ad,~ 0.910 -1.62e6
## 2 fort_bennin~ tmp_f [262,957 x 2] <ts> <ets> ETS(A,Ad,~ 0.987 -1.64e6
## 3 fort_bliss tmp_f [262,957 x 2] <ts> <ets> ETS(A,Ad,~ 0.854 -1.60e6
## 4 fort_bragg tmp_f [262,957 x 2] <ts> <ets> ETS(A,Ad,~ 1.03 -1.65e6
## # ... with 9 more variables: AIC <dbl>, BIC <dbl>, ME <dbl>, RMSE <dbl>,
## # MAE <dbl>, MPE <dbl>, MAPE <dbl>, MASE <dbl>, ACF1 <dbl>
# Augmented fitted and residual values
augment_fit_ets <- tmp_f_fit %>%
mutate(augment = map(fit_ets, sweep::sw_augment, timetk_idx = TRUE, rename_index = "time")) %>%
unnest(augment)
augment_fit_ets
## # A tibble: 1,051,828 x 9
## installation variable data data_ts fit_ets time
## <chr> <chr> <list<df[,2]> <list> <list> <dttm>
## 1 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 00:00:00
## 2 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 01:00:00
## 3 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 02:00:00
## 4 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 03:00:00
## 5 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 04:00:00
## 6 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 05:00:00
## 7 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 06:00:00
## 8 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 07:00:00
## 9 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 08:00:00
## 10 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 09:00:00
## # ... with 1,051,818 more rows, and 3 more variables: .actual <dbl>,
## # .fitted <dbl>, .resid <dbl>
# Plot the residuals (slow)
# augment_fit_ets %>%
# filter(installation == "eglin_afb") %>%
# ggplot(aes(x = time, y = .resid)) +
# geom_hline(yintercept = 0, color = "grey40") +
# geom_line() +
# geom_smooth(method = "loess") +
# labs(title = "Temperature (°F)",
# subtitle = "ETS Model Residuals") +
# theme_bw()
# Create decompositions
tmp_f_fit %>%
mutate(decomp = map(fit_ets, sweep::sw_tidy_decomp, timetk_idx = TRUE, rename_index = "time")) %>%
unnest(decomp)
## # A tibble: 1,051,828 x 9
## installation variable data data_ts fit_ets time
## <chr> <chr> <list<df[,2]> <list> <list> <dttm>
## 1 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 00:00:00
## 2 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 01:00:00
## 3 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 02:00:00
## 4 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 03:00:00
## 5 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 04:00:00
## 6 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 05:00:00
## 7 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 06:00:00
## 8 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 07:00:00
## 9 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 08:00:00
## 10 eglin_afb tmp_f [262,957 x 2] <ts> <ets> 1990-01-01 09:00:00
## # ... with 1,051,818 more rows, and 3 more variables: observed <dbl>,
## # level <dbl>, slope <dbl>
# Forecasting the model
tmp_f_fit_fcast <- tmp_f_fit %>%
mutate(fcast_ets = map(fit_ets, forecast, h = 336),
sweep = map(fcast_ets, sweep::sw_sweep, fitted = FALSE, timetk_idx = TRUE)) %>%
unnest(sweep)
tmp_f_fit_fcast
## # A tibble: 1,053,172 x 13
## installation variable data data_ts fit_ets fcast_ets
## <chr> <chr> <list<df[,2]> <list> <list> <list>
## 1 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 2 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 3 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 4 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 5 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 6 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 7 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 8 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 9 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## 10 eglin_afb tmp_f [262,957 x 2] <ts> <ets> <forecas~
## # ... with 1,053,162 more rows, and 7 more variables: index <dttm>, key <chr>,
## # value <dbl>, lo.80 <dbl>, lo.95 <dbl>, hi.80 <dbl>, hi.95 <dbl>
# Plot forecast
tmp_f_fit_fcast %>%
filter(installation == "eglin_afb",
index > as.Date("2019-05-01")) %>%
ggplot(aes(x = index, y = value, color = key)) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
fill = "#D5DBFF", color = NA, size = 0) +
geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
geom_line() +
labs(title = "Temperature (°F)",
subtitle = "ETS Model Forecasts",
x = "", y = "Temperature (°F)") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Multiple seasonal decomposition
tmp_f_fit_mstl <-
tmp_f_fit %>%
mutate(fit_mstl =
map(data_ts, forecast::mstl))
tmp_f_fit_mstl$fit_mstl[[1]] %>%
autoplot()
